Martyna Stasiak id.156071
Image segmentation based on:
%matplotlib inline
import glob
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL
import plotly.graph_objects as go
from IPython.display import HTML, display
from matplotlib.colors import ListedColormap
from pandas import DataFrame
from sklearn.mixture import GaussianMixture
pd.options.display.html.border = 0
pd.options.display.float_format = '{:,.2f}'.format
%pip install wget
Requirement already satisfied: wget in c:\users\mmart\appdata\local\programs\python\python310\lib\site-packages (3.2) Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.2 -> 24.3.1 [notice] To update, run: python.exe -m pip install --upgrade pip
import wget
"""
wget.download("http://www.lenna.org/lena_std.tif", "lena_std.tif")
wget.download("https://cs.stanford.edu/people/jcjohns/clevr/teaser.jpg", "clevr.jpg")
wget.download("https://github.com/opencv/opencv/raw/master/samples/data/graf1.png", "graf.png")
wget.download("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/sudoku.png", "sudoku.png")
for i in range(100, 111):
url = f"https://github.com/possibly-wrong/skittles/blob/master/images/{i}.jpg?raw=true"
wget.download(url, f"skittles{i}.jpg")
"""
'\nwget.download("http://www.lenna.org/lena_std.tif", "lena_std.tif")\nwget.download("https://cs.stanford.edu/people/jcjohns/clevr/teaser.jpg", "clevr.jpg")\nwget.download("https://github.com/opencv/opencv/raw/master/samples/data/graf1.png", "graf.png")\nwget.download("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/sudoku.png", "sudoku.png")\n\n\nfor i in range(100, 111):\n url = f"https://github.com/possibly-wrong/skittles/blob/master/images/{i}.jpg?raw=true"\n wget.download(url, f"skittles{i}.jpg")\n \n'
# download images
!wget -O lena_std.tif http://www.lenna.org/lena_std.tif
!wget -O clevr.jpg https://cs.stanford.edu/people/jcjohns/clevr/teaser.jpg
!wget -O graf.png https://github.com/opencv/opencv/raw/master/samples/data/graf1.png
!wget -O sudoku.png https://raw.githubusercontent.com/opencv/opencv/master/samples/data/sudoku.png
for i in range(100, 111):
!wget -O skittles{i}.jpg https://github.com/possibly-wrong/skittles/blob/master/images/{i}.jpg?raw=true
'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file. 'wget' is not recognized as an internal or external command, operable program or batch file.
def imshow(a):
a = a.clip(0, 255).astype("uint8")
if a.ndim == 3:
if a.shape[2] == 4:
a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
else:
a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
display(PIL.Image.fromarray(a))
css = """
<style type="text/css">
table, td, table.dataframe, table.dataframe td {
border: 1px solid black; //border: double;
border-collapse: collapse;
border-style: solid;
border-spacing: 0px;
background-color: rgb(250,250,250);
width: 18px;
height: 18px;
text-align: center;
transform: scale(1.0);
margin: 2px;
}
</style>
"""
def h(s):
return display(HTML(css + DataFrame(s).to_html(header=False, index=False)))
def h_color(a, cmap="gray", scale=2):
s = [a.shape[0] * scale, a.shape[1] * scale]
plt.figure(figsize=s)
plt.tick_params(
axis="both",
which="both",
bottom=False,
top=False,
labelbottom=False,
labelleft=False,
left=False,
right=False,
)
plt.imshow(a, cmap=cmap)
cmap = ListedColormap(
[
"black",
"tomato",
"chocolate",
"darkorange",
"gold",
"olive",
"green",
"deepskyblue",
"blueviolet",
"hotpink",
]
)
def h_grid(grid, scale=1):
h_color(grid, cmap, scale)
def pix_show(pixels, skip_each=1, height=800, width=800, colors=None):
pixels = pixels[::skip_each]
if colors is None:
colors = pixels[:, ::-1]
else:
colors = colors[::skip_each]
b, g, r = pixels[:, 0], pixels[:, 1], pixels[:, 2]
fig = go.Figure(
data=[
go.Scatter3d(
x=b,
y=g,
z=r,
mode="markers",
marker={"size": 2, "color": colors, "opacity": 0.7},
)
],
layout_xaxis_range=[0, 1],
layout_yaxis_range=[0, 1],
)
scene = {
"xaxis": dict(title="Blue"),
"yaxis": dict(title="Green"),
"zaxis": dict(title="Red"),
}
fig.update_layout(
autosize=False, height=height, width=width, scene=scene, showlegend=True
)
fig.show()
Image segmentation is a crucial technique in computer vision that involves partitioning an image into distinct regions or segments, each representing a meaningful part of the image. By classifying pixels into categories, segmentation helps in understanding the structure and content of an image, making it essential for applications such as object detection, medical imaging, autonomous driving, and image editing.
There are two primary types of image segmentation:
The main techniques of classical semantic segmentation:
The easiest way to segment an image is to perform a pixel intensity thresholding. The thresholding operation consists in replacing all intensities above a certain threshold with a certain constant value, and with another value below this threshold.
There is also segmentation with multiple thresholds, which was presented in the first class.
lena = cv2.imread("./lena_std.tif", cv2.IMREAD_COLOR)
imshow(lena)
lena_gray = cv2.cvtColor(lena, cv2.COLOR_BGR2GRAY)
imshow(lena_gray)
lut = np.array([255] * 100 + [0] * 100 + [255] * 56)
lena_lut = cv2.LUT(lena_gray, lut)
imshow(lena_lut)
The OpenCV library also includes a ready-made implementation of other simple image thresholding approaches. To perform the thresholding operation in OpenCV, the threshold() function should be called, which takes the image, the threshold value, the maximum value and the threshold method that should be used.
The available thresholding methods include:
_, lena_bin = cv2.threshold(lena_gray, 127, 255, cv2.THRESH_BINARY)
_, lena_bin_inv = cv2.threshold(lena_gray, 127, 255, cv2.THRESH_BINARY_INV)
_, lena_trunc = cv2.threshold(lena_gray, 127, 255, cv2.THRESH_TRUNC)
_, lena_tozero = cv2.threshold(lena_gray, 127, 255, cv2.THRESH_TOZERO)
_, lena_tozero_inv = cv2.threshold(lena_gray, 127, 255, cv2.THRESH_TOZERO_INV)
imshow(np.concatenate([lena_gray, lena_bin, lena_bin_inv], 1))
imshow(np.concatenate([lena_trunc, lena_tozero, lena_tozero_inv], 1))
sudoku = cv2.imread("./sudoku.png", cv2.IMREAD_GRAYSCALE)
imshow(sudoku)
print(sudoku.shape)
(563, 558)
_, sudoku_bin = cv2.threshold(sudoku, 70, 255, cv2.THRESH_BINARY)
imshow(sudoku_bin)
#creating lut same as the binary treshold
lut = np.array([0] * 70 + [255] * (256-70))
sudoku_lut = cv2.LUT(sudoku, lut)
imshow(sudoku_lut)
lut = np.array([0] * 60 + [255] * 70 + [0] * 126) #0-49 -> 0, 50-129 -> 255, 130-255 -> 0
sudoku_lut = cv2.LUT(sudoku, lut)
imshow(sudoku_lut)
lut = np.array([0] * 50 + [255] * 80 + [0] * 126) #0-49 -> 0, 50-129 -> 255, 130-255 -> 0
sudoku_lut = cv2.LUT(sudoku, lut)
imshow(sudoku_lut)
OTSU is an algorithm designed to dynamically determine the threshold value, ensuring that the pixel intensity variance within both resulting classes is minimized, effectively maximizing the inter-class variance.
The minimized function utilized by OTSU is expressed as: $$\sigma^2_w(t) = Q_1\sigma^2_1 + Q_2\sigma^2_2$$ where:
To identify the optimal threshold value, the objective is to minimize the expression across all possible values (from 0 to 255).
The initial step involves calculating probability density function (PDF) so the probability of a pixel having a specific intensity (ranging from 0 to 255). This can be achieved by determining the histogram and subsequently normalizing it. Additionally, the cumulative distribution function (CDF) will be computed to facilitate the calculation of mean and conditional variances.
def calculate_pdf_cdf(img):
h = cv2.calcHist([img], [0], None, [256], [0, 256])
pdf = h.ravel() / h.sum()
cdf = np.cumsum(pdf)
return pdf, cdf
pdf, cdf = calculate_pdf_cdf(sudoku)
plt.plot(pdf)
plt.show()
plt.plot(cdf)
plt.show()
In a single iteration of the OTSU algorithm, where a specific threshold is set, the algorithm divides the probability distribution into the probabilities of pixel occurrences in both classes. The determination of the mean value involves the use of the formula for the conditional expected value (because we are calculating the average of pixels under the condition that a specific class occurs, expressed as the: ''expected value for a given class'').
$$ E(C_1 | t_1) = \sum_{x =1}^{t_1} \frac{xP(x)}{Q_1}$$Then, to calculate the (conditional!) variances, we use the previously calculated conditional expected values. $$\sigma^2_1 = \sum_{x =1}^{t_1} \frac{(x - E[C_1|t_1])^2P(x)}{Q_1}$$
def cond_mean(x, p, q):
# Calculate the conditional mean for a given class
return np.sum(x * p / q)
def otsu_one_step(x, pdf, cdf, epsilon=1e-6):
"""
Calculates one step of the ostu algorithm for one threshold value x.
"""
# Split intensity values into lower and higher than the threshold
x_low, x_high = np.arange(x), np.arange(x, 256)
# Split probability densities into lower and higher than the threshold
p_low, p_high = pdf[:x], pdf[x:]
# Calculate cumulative probabilities for lower and higher classes
q_low, q_high = cdf[x], cdf[-1] - cdf[x]
# Check for small cumulative probabilities to avoid division by zero
if q_low < epsilon or q_high < epsilon:
return None
# Calculate conditional means for lower and higher classes
m_low, m_high = cond_mean(x_low, p_low, q_low), cond_mean(x_high, p_high, q_high)
# Calculate conditional variances for lower and higher classes
s_low = cond_mean((x_low - m_low) ** 2, p_low, q_low)
s_high = cond_mean((x_high - m_high) ** 2, p_high, q_high)
# Combine conditional variances to optimize Otsu's method
return s_low * q_low + s_high * q_high
In the most basic version, the OTSU algorithm, we iterating over all possible divisions (threshold from 1 to 254) and selecting the one for which the previously presented objective function returns the smallest value.
def otsu(img):
pdf, cdf = calculate_pdf_cdf(img)
v_min = None
threshold = 0
for i in range(1, 254):
v = otsu_one_step(i, pdf, cdf)
if v is not None and (v_min is None or v < v_min):
v_min = v
threshold = i + 1
_, img_otsu = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)
return threshold, img_otsu
In order to use the OSTU method in opencv, just add THRESH_OTSU to the thresholding operation.
th_self, sudoku_otsu_self = otsu(sudoku)
th_auto, sudoku_otsu_auto = cv2.threshold(
sudoku, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
print(
"Found value of thresholding with the OTSU algorithm (custom implementation):",
th_self,
)
print("Found value of thresholding with the OTSU algorithm (OpenCV):", th_auto)
print("\nOTSU (custom implementation):")
imshow(sudoku_otsu_self)
print("\nOTSU (OpenCV):")
imshow(sudoku_otsu_auto)
Found value of thresholding with the OTSU algorithm (custom implementation): 96 Found value of thresholding with the OTSU algorithm (OpenCV): 96.0 OTSU (custom implementation):
OTSU (OpenCV):
th_self, lena_otsu_self = otsu(lena_gray)
th_auto, lena_otsu_auto = cv2.threshold(
lena_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
print(
"Found value of thresholding with the OTSU algorithm (custom implementation):",
th_self,
)
print("Found value of thresholding with the OTSU algorithm (OpenCV):", th_auto)
print("\nOTSU (custom implementation):")
imshow(lena_otsu_self)
print("\nOTSU (OpenCV):")
imshow(lena_otsu_auto)
Found value of thresholding with the OTSU algorithm (custom implementation): 118 Found value of thresholding with the OTSU algorithm (OpenCV): 117.0 OTSU (custom implementation):
OTSU (OpenCV):
Among the thresholding methods, there are also adaptive methods. These are thresholding methods that adjust the threshold value depending on the image content.
Adaptive thresholding methods often work very well when the input image is divided into smaller areas and the threshold value is adjusted separately for each area. The motivation behind such a mechanism is the fact that in real images the lighting (as well as focus, balance, etc.) is uneven.
blockSize = 101
constant = 2
lena_ad_mean = cv2.adaptiveThreshold(
lena_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize, constant
)
lena_ad_gauss = cv2.adaptiveThreshold(
lena_gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize,
constant,
)
imshow(np.concatenate([lena_ad_mean, lena_ad_gauss], 1))
blockSize = 3
constant = 2
lena_ad_mean = cv2.adaptiveThreshold(
lena_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize, constant
)
lena_ad_gauss = cv2.adaptiveThreshold(
lena_gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize,
constant,
)
imshow(np.concatenate([lena_ad_mean, lena_ad_gauss], 1))
Segmenting multi-channel images, such as RGB, using straightforward thresholding methods becomes challenging due to the necessity of defining thresholds in N-dimensional space. As a result, cluster analysis methods are frequently employed instead of simplistic thresholding for the segmentation of multi-channel images.
Cluster analysis involves identifying clusters of pixels within a specific space, sometimes directly in the intensity space. This process entails creating distinct pixel classes based on their characteristics within that space.
To illustrate, let's conduct a basic pixel intensity analysis on the given image below.
graf = cv2.imread("./graf.png", cv2.IMREAD_COLOR)
graf = cv2.resize(graf, None, fx=0.75, fy=0.75)
imshow(graf)
The image presented as a list of pixels (BGR) was displayed in 3D space, where the coordinates of a given pixel are its intensity values. Additionally, the pixels have been colored according to their intensities.
graf_pixels = graf.reshape([-1, 3])
pix_show(graf_pixels, 16)
From the analysis of the above visualization, a few conclusions can be drawn:
One method of splitting spaces into clusters is Gaussian Mixture, which approximates the distribution of clusters using N Gaussian distributions. It is a method with training parameters, so there is a need for a certain sample of data to which we could fit a mathematical model.
We will use a pixel list (BGR) as the data to which we will fit the model, and then assign each of them a number of Gaussian systems to which it belongs with the highest probability.
# model initialization and training
model = GaussianMixture(n_components=8)
model.fit(graf_pixels)
# assigning classes to pixels
segments = model.predict(graf_pixels)
print(segments)
[4 4 4 ... 0 0 0]
The next step will be to calculate the average color for each class (segment) and display the pixels again with the colors representing the segmentation.
segments_colors = np.stack([graf_pixels[segments == i].mean(0) for i in range(8)], 0)
colors = np.take(segments_colors, segments, 0)
pix_show(graf_pixels, 16, colors=colors[:, ::-1])
Pixels with mapped classes are the final image segmentation based on cluster analysis. The input image and the segmentation effect are shown below.
segmented = colors.reshape(graf.shape)
imshow(np.concatenate([graf, segmented], 1))
Segmentation through edge detection is based on the knowledge learned in the previous class as part of detecting key points, corners and edges.
The idea is to divide the image on the basis of edges, and then fill closed areas, assigning subsequent identifiers to subsequent separable areas.
The assignment of identifier areas is presented in the next section.
clevr = cv2.imread("./clevr.jpg", cv2.IMREAD_COLOR)
clevr = cv2.resize(clevr, None, fx=0.5, fy=0.5)
imshow(clevr)
clevr_gray = cv2.cvtColor(clevr, cv2.COLOR_BGR2GRAY)
imshow(clevr_gray)
canny_high, _ = cv2.threshold(clevr_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
canny_low = 0.5 * canny_high
clevr_canny = cv2.Canny(clevr_gray, canny_low, canny_high, 9)
clevr_canny = cv2.morphologyEx(
clevr_canny, cv2.MORPH_CLOSE, kernel=np.ones((3, 3), np.uint8)
)
imshow(clevr_canny)
Segmentation by region growing consists in iteratingly joining adjacent areas until a certain condition is met. The areas are joined after meeting the uniformity test, while the algorithm is executed until the stop condition is met.
Uniformity test - combining areas and checking a certain condition. An example condition might be: the average difference of pixel intensity in both areas. If it is greater than a certain threshold, then the areas are not uniform and there is no connection.
Stop condition - this condition can be treated as no further merges of areas or as a condition for early stopping the algorithm (e.g. when we want the areas not to be larger than the set limit.
For region growing, previously detected edges can be used as region boundaries, so the stop condition means that all pixels inside the area surrounded by edges belong to the region.
regions = np.zeros(clevr_canny.shape[:2], np.int32)
neighbours = [(-1, 0), (1, 0), (0, -1), (0, 1)]
def find_neighbours(img, y, x):
c_neighbours = []
for dy, dx in neighbours:
ny, nx = y + dy, x + dx
if ny < 0 or ny >= img.shape[0] or nx < 0 or nx >= img.shape[1]:
continue
if regions[ny, nx] > 0:
continue
if img[ny, nx] == 255:
continue
if img[y, x] == img[ny, nx]: # Uniformity test
c_neighbours.append((ny, nx))
return c_neighbours
def grow_region(img, y, x, cls):
regions[y, x] = cls
c_neighbours = find_neighbours(img, y, x)
for ny, nx in c_neighbours:
regions[ny, nx] = cls
while len(c_neighbours) > 0:
new_neighbours = []
for ny, nx in c_neighbours:
i_new_neighbours = find_neighbours(img, ny, nx)
for _ny, _nz in i_new_neighbours:
regions[_ny, _nz] = cls
new_neighbours.extend(i_new_neighbours)
c_neighbours = new_neighbours
i = 1
for y in range(clevr_canny.shape[0]):
for x in range(clevr_canny.shape[1]):
if regions[y, x] == 0 and clevr_canny[y, x] == 0:
grow_region(clevr_canny, y, x, i)
i += 1
mean_colors = np.stack(
[
np.array([255, 255, 255]) if j == 0 else clevr[regions == j].mean(0)
for j in range(i)
],
0,
)
regions_colors = np.take(mean_colors, regions, 0)
imshow(regions_colors)
In this case (detected edges), a similar result can also be obtained using connected component.
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(clevr_canny, kernel, iterations=1)
ret, markers = cv2.connectedComponents(255-edges)
mean_colors = np.stack(
[
np.array([255, 255, 255]) if j == 0 else clevr[regions == j].mean(0)
for j in range(ret)
],
0,
)
regions_colors = np.take(mean_colors, regions, 0)
imshow(regions_colors)
C:\Users\mmart\AppData\Local\Temp\ipykernel_22948\1053265544.py:7: RuntimeWarning: Mean of empty slice. c:\Users\mmart\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\core\_methods.py:121: RuntimeWarning: invalid value encountered in divide
Like the section on multi-channel image segmentation, perform the same pixel intensity cluster analysis for the './skittles100.jpg' image and then segment the image using the K-Means algorithm (available, among others, in the scikit library: sklearn.cluster.KMeans) .
Present the intermediate results:
# BGR input image
skittles1 = cv2.imread("./skittles100.jpg", cv2.IMREAD_COLOR)
skittles1 = cv2.resize(skittles1, None, fx=0.75, fy=0.75)
print("Skittles input image in the BGR format:")
imshow(skittles1)
Skittles input image in the BGR format:
# BGR pixels in 3D space
skittles1_pixels = skittles1.reshape([-1, 3])
print("Skittles image's pixels in the 3D space, coordintes are the intensities of the BGR channels:")
pix_show(skittles1_pixels, 16)
Skittles image's pixels in the 3D space, coordintes are the intensities of the BGR channels:
# segmentation result on BGR pixels in 3D space using Kmeans
#first using the elbow method i will determine the optimal number of clusters
from sklearn.cluster import KMeans
def elbow_method(data, k_range):
inertia = []
for k in k_range:
kmeans = KMeans(n_clusters=k)
kmeans.fit(data)
inertia.append(kmeans.inertia_)
plt.figure(figsize=(8, 4))
plt.plot(k_range, inertia, marker='o')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.grid(True)
plt.show()
elbow_method(skittles1_pixels, range(1, 10))
unfortunately the elbow method did not povide me with satisfying k - since I know that 4 clusters will not be enough because of the fact that on the picture of skittles1 is more than 4 colours of skittles (5 to be exact); so because of that I have chosen not the elbow point, but the number 7 because it seems to be 'corresponding well' with the picture.
from sklearn.cluster import KMeans
# segmentation result on BGR pixels in 3D space using Kmeans
# Perform KMeans clustering
kmeans = KMeans(n_clusters=7, random_state=0)
kmeans.fit(skittles1_pixels)
segments = kmeans.labels_
# Assigning the colours to segments
segments_colors = kmeans.cluster_centers_
colors = np.take(segments_colors, segments, axis=0)
# Visualization of the KMeans clustering result with k=7
pix_show(skittles1_pixels, 16, colors=colors[:, ::-1])
# segmentation result as a 2D image (BGR)
segmented = colors.reshape(skittles1.shape)
print("Segmentation result as a 2D image (BGR) after using kmeans with 7 clusters")
imshow(segmented)
Segmentation result as a 2D image (BGR) after using kmeans with 7 clusters
print("The input skittles and the segmented image using kmeans")
imshow(np.concatenate([skittles1, segmented], 1))
The input skittles and the segmented image using kmeans
Using the methods you learned in the previous class, find the number of Skittles in the image './skittles100.jpg'. (it is not necessary to use the solution from task 1) Show intermediate results and describe the processing steps in the comment.
Show the original image with founded individual skittles marked on it.
# Reading the initial image- skittles100
skittles100 = cv2.imread('./skittles100.jpg')
print("Original image- skittles100:")
imshow(cv2.resize(skittles100, None, fx=0.4, fy=0.4))
Original image- skittles100:
# Converting image to the HSV color space- thanks to that it will be easier to get the
# skittles separated from the background
hsv_img = cv2.cvtColor(skittles100, cv2.COLOR_BGR2HSV)
print("Image converted to the HSV colour space:")
imshow(cv2.resize(hsv_img, None, fx=0.4, fy=0.4))
Image converted to the HSV colour space:
# Applying colour thresholding to isolate Skittles from the background
# Ranges are chosen by me by trial and error
lower_bound = np.array([0, 50, 50])
upper_bound = np.array([179, 255, 255])
mask = cv2.inRange(hsv_img, lower_bound, upper_bound)
print("Mask after applying colour thresholding:")
imshow(cv2.resize(mask, None, fx=0.4, fy=0.4))
Mask after applying colour thresholding:
# Creating an additional mask for the black Skittles since they are not visible in the previous mask
lower_black = np.array([0, 0, 0])
upper_black = np.array([179, 255, 50])
black_mask = cv2.inRange(hsv_img, lower_black, upper_black)
print("Second mask after applying colour thresholding specificaly for the black skittles:")
imshow(cv2.resize(black_mask, None, fx=0.4, fy=0.4))
# we may see that the corners are also detected here so later on I will exclude them to ensure that they wont be
# counted as the skittles
Second mask after applying colour thresholding specificaly for the black skittles:
# Combining the masks
mask = cv2.bitwise_or(mask, black_mask)
print("Mask created by combining mask for black skittles and first one that detected colourful skittles:")
imshow(cv2.resize(mask, None, fx=0.4, fy=0.4))
Mask created by combining mask for black skittles and first one that detected colourful skittles:
#Removing the corners from the mask to ensure that they will not be detected and counted as the skittles
h, w = mask.shape
corner_size = 65 # Size of the corner to be removed
mask[:corner_size, :corner_size] = 0 # Top-left corner that was problematic
mask[:corner_size, w-corner_size:] = 0 # Top-right corner that was problematic
# mask[h-corner_size:, :corner_size] = 0 # Bottom-left corner
# mask[h-corner_size:, w-corner_size:] = 0 # Bottom-right corner
print("Mask after removing the corners:")
imshow(cv2.resize(mask, None, fx=0.4, fy=0.4))
Mask after removing the corners:
# Using dilation to fill the holes in the mask- especially in the middle of the skittles S in black ones
struct = np.ones((3, 3), np.uint8)
mask_dilated = cv2.dilate(mask, struct, iterations=3) #I chose the number of iterations with trial and error
print("Using dilation to fill the holes in the mask:")
imshow(cv2.resize(mask_dilated, None, fx=0.4, fy=0.4))
Using dilation to fill the holes in the mask:
# Eroding the mask to remove the noise and shrunk the skittles so the single ones will be detected
# here the number of iterations were also chosen by trial and error so the skittles woudnt be merged together
ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
mask_eroded = cv2.erode(mask_dilated, ellipse_kernel, iterations=10)
print("Using erosion to shrink the skittles so they wouldn't be touching and considered as the one:")
imshow(cv2.resize(mask_eroded, None, fx=0.4, fy=0.4))
Using erosion to shrink the skittles so they wouldn't be touching and considered as the one:
# Finding the contours in eroded mask
contours, _ = cv2.findContours(mask_eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filtering contours with the minimum area to consider a contour as a skittle
min_area = 15 # if contour area would be smaller than min_area it will be ignored; here it was also try and error
filtered_points = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area] #filtering the contours with the minimum area
#Drawing the centers as a red point on detected skittles on the original image
output_image = skittles100.copy()
for i in filtered_points:
(x, y), radius = cv2.minEnclosingCircle(i)
center = (int(x), int(y))
#I have added the green circle to better show the skittles
# on the resized images because I am blind and I can't see them
radius = int(radius) + 2
cv2.circle(output_image, center, radius, (0, 255, 0), 2)
cv2.circle(output_image, center, 6, (0, 0, 255), -1)
print("Image of the skittles with each skittles marked on them:")
imshow(cv2.resize(output_image, None, fx=0.4, fy=0.4))
Image of the skittles with each skittles marked on them:
#Counting the number of detected skittles
num_skittles = len(filtered_points)
print(f"Number of skittles detected on the picture: {num_skittles}")
Number of skittles detected on the picture: 60
Here I wrote the code from above in a form of definition so i can easily apply it to the images from the third task. I didn't add comments on this code and I am not showing each step in here since it is the same code from above just put in the def
def find_and_mark_skittles(skittles_img):
hsv_img = cv2.cvtColor(skittles_img, cv2.COLOR_BGR2HSV)
lower_bound = np.array([0, 50, 50])
upper_bound = np.array([179, 255, 255])
mask = cv2.inRange(hsv_img, lower_bound, upper_bound)
lower_black = np.array([0, 0, 0])
upper_black = np.array([179, 255, 50])
black_mask = cv2.inRange(hsv_img, lower_black, upper_black)
mask = cv2.bitwise_or(mask, black_mask)
h, w = mask.shape
corner_size = 65
mask[:corner_size, :corner_size] = 0
mask[:corner_size, w-corner_size:] = 0
struct = np.ones((3, 3), np.uint8)
mask = cv2.dilate(mask, struct, iterations=3)
ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
mask_eroded = cv2.erode(mask, ellipse_kernel, iterations=10)
contours, _ = cv2.findContours(mask_eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
min_area = 15
filtered_points = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]
output_image = skittles_img.copy()
for cnt in filtered_points:
(x, y), radius = cv2.minEnclosingCircle(cnt)
center = (int(x), int(y))
radius = int(radius)
cv2.circle(output_image, center, radius, (0, 255, 0), 2)
cv2.circle(output_image, center, 3, (0, 0, 255), -1)
num_skittles = len(filtered_points)
return num_skittles, output_image
for file in glob.glob("./skittles*"):
# print(file)
skittles = cv2.imread(file, cv2.IMREAD_COLOR)
# imshow(cv2.resize(skittles, None, fx=0.4, fy=0.4))
number_of_skittles, skittles_output = find_and_mark_skittles(skittles)
print(f"Number of skittles found on the {file}: {number_of_skittles}")
imshow(cv2.resize(np.concatenate([skittles, skittles_output],1), None, fx=0.5, fy=0.5))
Number of skittles found on the .\skittles100.jpg: 60
Number of skittles found on the .\skittles101.jpg: 51
Number of skittles found on the .\skittles102.jpg: 61
Number of skittles found on the .\skittles103.jpg: 56
Number of skittles found on the .\skittles104.jpg: 56
Number of skittles found on the .\skittles105.jpg: 62
Number of skittles found on the .\skittles106.jpg: 54
Number of skittles found on the .\skittles107.jpg: 58
Number of skittles found on the .\skittles108.jpg: 55
Number of skittles found on the .\skittles109.jpg: 51
Number of skittles found on the .\skittles110.jpg: 61
Summing up my approach works and detects most of the skittles on all of the images from the 3rd task; the only problem is with skittles that are very close to each other- then they are merged and counted as a one, also some of the black ones are not being seen but it is not a high number.